在日常的程式開發和系統管理中,我們經常需要了解磁碟空間的使用情況。今天我們將建立一個目錄分析器,它能夠遞歸地分析指定目錄的大小,並生成詳細的報告。這個專案將幫助我們學習 Rust 的文件系統操作、錯誤處理、以及數據結構的使用。
專案目標
cargo new directory_analyzer
cd directory_analyzer
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
clap = { version = "4.0", features = ["derive"] }
anyhow = "1.0"
colored = "2.0"
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileInfo {
pub path: PathBuf,
pub size: u64,
pub is_dir: bool,
pub children: Option<Vec<FileInfo>>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct DirectoryReport {
pub root_path: PathBuf,
pub total_size: u64,
pub file_count: usize,
pub dir_count: usize,
pub largest_files: Vec<FileInfo>,
pub directory_tree: FileInfo,
pub size_distribution: HashMap<String, u64>,
}
src/analyzer.rs
use anyhow::{Context, Result};
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use crate::{DirectoryReport, FileInfo};
pub struct DirectoryAnalyzer {
max_depth: Option<usize>,
follow_symlinks: bool,
include_hidden: bool,
}
impl DirectoryAnalyzer {
pub fn new() -> Self {
Self {
max_depth: None,
follow_symlinks: false,
include_hidden: false,
}
}
pub fn max_depth(mut self, depth: usize) -> Self {
self.max_depth = Some(depth);
self
}
pub fn follow_symlinks(mut self, follow: bool) -> Self {
self.follow_symlinks = follow;
self
}
pub fn include_hidden(mut self, include: bool) -> Self {
self.include_hidden = include;
self
}
pub fn analyze<P: AsRef<Path>>(&self, path: P) -> Result<DirectoryReport> {
let path = path.as_ref();
if !path.exists() {
anyhow::bail!("路徑不存在: {}", path.display());
}
let directory_tree = self.scan_directory(path, 0)?;
let stats = self.calculate_statistics(&directory_tree);
Ok(DirectoryReport {
root_path: path.to_path_buf(),
total_size: directory_tree.size,
file_count: stats.file_count,
dir_count: stats.dir_count,
largest_files: stats.largest_files,
directory_tree,
size_distribution: stats.size_distribution,
})
}
fn scan_directory(&self, path: &Path, depth: usize) -> Result<FileInfo> {
// 檢查深度限制
if let Some(max_depth) = self.max_depth {
if depth > max_depth {
return Ok(FileInfo {
path: path.to_path_buf(),
size: 0,
is_dir: true,
children: Some(vec![]),
});
}
}
let metadata = fs::metadata(path)
.with_context(|| format!("無法讀取元數據: {}", path.display()))?;
if metadata.is_file() {
return Ok(FileInfo {
path: path.to_path_buf(),
size: metadata.len(),
is_dir: false,
children: None,
});
}
// 處理目錄
let mut children = Vec::new();
let mut total_size = 0u64;
let read_dir = fs::read_dir(path)
.with_context(|| format!("無法讀取目錄: {}", path.display()))?;
for entry in read_dir {
let entry = entry?;
let entry_path = entry.path();
// 跳過隱藏文件(如果設置不包含)
if !self.include_hidden && self.is_hidden(&entry_path) {
continue;
}
// 處理符號連結
if entry_path.is_symlink() && !self.follow_symlinks {
continue;
}
match self.scan_directory(&entry_path, depth + 1) {
Ok(child_info) => {
total_size += child_info.size;
children.push(child_info);
}
Err(e) => {
eprintln!("警告: 跳過 {}: {}", entry_path.display(), e);
}
}
}
Ok(FileInfo {
path: path.to_path_buf(),
size: total_size,
is_dir: true,
children: Some(children),
})
}
fn is_hidden(&self, path: &Path) -> bool {
path.file_name()
.and_then(|name| name.to_str())
.map(|name| name.starts_with('.'))
.unwrap_or(false)
}
fn calculate_statistics(&self, tree: &FileInfo) -> DirectoryStats {
let mut stats = DirectoryStats::new();
self.collect_stats(tree, &mut stats);
stats.largest_files.sort_by(|a, b| b.size.cmp(&a.size));
stats.largest_files.truncate(10); // 只保留前10個最大文件
stats
}
fn collect_stats(&self, node: &FileInfo, stats: &mut DirectoryStats) {
if node.is_dir {
stats.dir_count += 1;
if let Some(children) = &node.children {
for child in children {
self.collect_stats(child, stats);
}
}
} else {
stats.file_count += 1;
stats.largest_files.push(node.clone());
// 按文件大小分類
let size_category = self.categorize_size(node.size);
*stats.size_distribution.entry(size_category).or_insert(0) += node.size;
}
}
fn categorize_size(&self, size: u64) -> String {
match size {
0..=1024 => "< 1KB".to_string(),
1025..=1048576 => "1KB - 1MB".to_string(),
1048577..=104857600 => "1MB - 100MB".to_string(),
104857601..=1073741824 => "100MB - 1GB".to_string(),
_ => "> 1GB".to_string(),
}
}
}
#[derive(Debug)]
struct DirectoryStats {
file_count: usize,
dir_count: usize,
largest_files: Vec<FileInfo>,
size_distribution: HashMap<String, u64>,
}
impl DirectoryStats {
fn new() -> Self {
Self {
file_count: 0,
dir_count: 0,
largest_files: Vec::new(),
size_distribution: HashMap::new(),
}
}
}
src/formatter.rs
use crate::{DirectoryReport, FileInfo};
use colored::*;
use std::io::Write;
pub trait ReportFormatter {
fn format_report(&self, report: &DirectoryReport) -> String;
}
pub struct TextFormatter {
show_tree: bool,
use_colors: bool,
}
impl TextFormatter {
pub fn new() -> Self {
Self {
show_tree: true,
use_colors: true,
}
}
pub fn show_tree(mut self, show: bool) -> Self {
self.show_tree = show;
self
}
pub fn use_colors(mut self, use_colors: bool) -> Self {
self.use_colors = use_colors;
self
}
fn format_size(&self, size: u64) -> String {
const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
let mut size_f = size as f64;
let mut unit_index = 0;
while size_f >= 1024.0 && unit_index < UNITS.len() - 1 {
size_f /= 1024.0;
unit_index += 1;
}
if unit_index == 0 {
format!("{} {}", size, UNITS[unit_index])
} else {
format!("{:.2} {}", size_f, UNITS[unit_index])
}
}
fn format_tree(&self, node: &FileInfo, prefix: &str, is_last: bool) -> String {
let mut output = String::new();
let connector = if is_last { "└── " } else { "├── " };
let name = node.path.file_name()
.and_then(|n| n.to_str())
.unwrap_or("Unknown");
let size_str = self.format_size(node.size);
let line = format!("{}{}{} ({})",
prefix,
connector,
if self.use_colors && node.is_dir {
name.blue().bold()
} else {
name.normal()
},
if self.use_colors {
size_str.green()
} else {
size_str.normal()
}
);
output.push_str(&line);
output.push('\n');
if let Some(children) = &node.children {
let new_prefix = format!("{}{}",
prefix,
if is_last { " " } else { "│ " }
);
for (i, child) in children.iter().enumerate() {
let is_last_child = i == children.len() - 1;
output.push_str(&self.format_tree(child, &new_prefix, is_last_child));
}
}
output
}
}
impl ReportFormatter for TextFormatter {
fn format_report(&self, report: &DirectoryReport) -> String {
let mut output = String::new();
// 標題
let title = "目錄分析報告";
if self.use_colors {
output.push_str(&format!("{}\n", title.cyan().bold()));
} else {
output.push_str(&format!("{}\n", title));
}
output.push_str(&"=".repeat(50));
output.push_str("\n\n");
// 基本統計
output.push_str(&format!("分析路徑: {}\n", report.root_path.display()));
output.push_str(&format!("總大小: {}\n", self.format_size(report.total_size)));
output.push_str(&format!("文件數量: {}\n", report.file_count));
output.push_str(&format!("目錄數量: {}\n", report.dir_count));
output.push_str("\n");
// 大小分佈
if self.use_colors {
output.push_str(&format!("{}\n", "大小分佈:".yellow().bold()));
} else {
output.push_str("大小分佈:\n");
}
for (category, size) in &report.size_distribution {
output.push_str(&format!(" {}: {}\n",
category,
self.format_size(*size)
));
}
output.push_str("\n");
// 最大文件
if self.use_colors {
output.push_str(&format!("{}\n", "前10大文件:".yellow().bold()));
} else {
output.push_str("前10大文件:\n");
}
for (i, file) in report.largest_files.iter().take(10).enumerate() {
output.push_str(&format!("{}. {} - {}\n",
i + 1,
file.path.display(),
self.format_size(file.size)
));
}
output.push_str("\n");
// 目錄樹
if self.show_tree {
if self.use_colors {
output.push_str(&format!("{}\n", "目錄結構:".yellow().bold()));
} else {
output.push_str("目錄結構:\n");
}
output.push_str(&self.format_tree(&report.directory_tree, "", true));
}
output
}
}
pub struct JsonFormatter;
impl ReportFormatter for JsonFormatter {
fn format_report(&self, report: &DirectoryReport) -> String {
serde_json::to_string_pretty(report).unwrap_or_else(|e| {
format!("JSON 序列化錯誤: {}", e)
})
}
}
mod analyzer;
mod formatter;
use analyzer::DirectoryAnalyzer;
use formatter::{JsonFormatter, ReportFormatter, TextFormatter};
use anyhow::Result;
use clap::{Parser, ValueEnum};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileInfo {
pub path: PathBuf,
pub size: u64,
pub is_dir: bool,
pub children: Option<Vec<FileInfo>>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct DirectoryReport {
pub root_path: PathBuf,
pub total_size: u64,
pub file_count: usize,
pub dir_count: usize,
pub largest_files: Vec<FileInfo>,
pub directory_tree: FileInfo,
pub size_distribution: HashMap<String, u64>,
}
#[derive(Debug, Clone, ValueEnum)]
enum OutputFormat {
Text,
Json,
}
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Args {
/// 要分析的目錄路徑
#[arg(value_name = "PATH")]
path: PathBuf,
/// 輸出格式
#[arg(short, long, value_enum, default_value_t = OutputFormat::Text)]
format: OutputFormat,
/// 最大遞歸深度
#[arg(short, long)]
max_depth: Option<usize>,
/// 跟隨符號連結
#[arg(short = 'L', long)]
follow_symlinks: bool,
/// 包含隱藏文件
#[arg(short = 'a', long)]
include_hidden: bool,
/// 不顯示目錄樹
#[arg(long)]
no_tree: bool,
/// 不使用顏色
#[arg(long)]
no_color: bool,
/// 輸出到文件
#[arg(short, long)]
output: Option<PathBuf>,
}
fn main() -> Result<()> {
let args = Args::parse();
// 建立分析器
let mut analyzer = DirectoryAnalyzer::new()
.follow_symlinks(args.follow_symlinks)
.include_hidden(args.include_hidden);
if let Some(depth) = args.max_depth {
analyzer = analyzer.max_depth(depth);
}
// 執行分析
println!("正在分析目錄: {}", args.path.display());
let report = analyzer.analyze(&args.path)?;
// 格式化輸出
let formatted_report = match args.format {
OutputFormat::Text => {
let formatter = TextFormatter::new()
.show_tree(!args.no_tree)
.use_colors(!args.no_color);
formatter.format_report(&report)
}
OutputFormat::Json => {
let formatter = JsonFormatter;
formatter.format_report(&report)
}
};
// 輸出結果
if let Some(output_path) = args.output {
std::fs::write(&output_path, &formatted_report)?;
println!("報告已保存到: {}", output_path.display());
} else {
print!("{}", formatted_report);
}
Ok(())
}
cargo build --release
# 分析當前目錄
cargo run -- .
# 分析特定目錄,限制深度為3
cargo run -- /home/user/documents --max-depth 3
# 包含隱藏文件並輸出為JSON
cargo run -- . --include-hidden --format json
# 將報告保存到文件
cargo run -- . --output report.txt
# 跟隨符號連結
cargo run -- . --follow-symlinks
好誒